
version 16.0
clear all
set more off

capture log close
log using "build_panel_log.txt", replace text

* ============================================================
* build_panel.do — Construct analysis panel (data.xlsx) from raw files
*   Inputs (same folder):
*     - CNRDS_green_patents.xlsx
*     - CSMAR_finance_data.xlsx
*     - GovWorkReports_count_summary..xlsx
*   Outputs:
*     - data.xlsx            (analysis-ready panel)
*     - panel_planA.csv      (CSV mirror)
*     - build_panel_log.txt  (text log)
* ============================================================

* -------------------- 0) Helper: rename candidate -> target --------------------
program define _try_rename_if_missing
    * args: target cand1 cand2 ...
    args target
    local found = 0
    capture confirm variable `target'
    if _rc {  // target not present; try candidates
        foreach c of local 0 {
            if "`c'" == "" continue
            capture confirm variable `c'
            if !_rc {
                rename `c' `target'
                local found = 1
                exit
            }
        }
    }
end

* -------------------- 1) CNRDS: green patent applications -> GI --------------------
capture confirm file "CNRDS_green_patents.xlsx"
if _rc {
    di as error "Missing: CNRDS_green_patents.xlsx"
    exit 601
}

import excel using "CNRDS_green_patents.xlsx", firstrow clear

* Standardize firm and year
capture noisily _try_rename_if_missing stkcd Scode Stkcd stockcode 证券代码 股票代码
capture noisily _try_rename_if_missing year Year 年度
capture confirm variable stkcd
if _rc di as error "Cannot find firm code column in CNRDS file."

* Keep listed company itself when available
capture confirm variable Ftyp
if !_rc drop if !ustrregexm(Ftyp, "上市公司本身")

* Ensure app subcolumns exist (create zeros if missing) and numeric
local appcols GreInvia GreUmia GreInvja GreUmja
foreach v of local appcols {
    capture confirm variable `v'
    if _rc gen double `v' = 0
    else destring `v', replace ignore(" ,/")
}
egen app_count_row = rowtotal(GreInvia GreUmia GreInvja GreUmja)
keep stkcd year app_count_row
collapse (sum) app_count = app_count_row, by(stkcd year)

* GI = ln(1 + applications)
gen double GI = ln(1 + app_count)

tempfile cnrds
save `cnrds'

* -------------------- 2) CSMAR: firm-year controls, industry, flags --------------------
capture confirm file "CSMAR_finance_data.xlsx"
if _rc {
    di as error "Missing: CSMAR_finance_data.xlsx"
    exit 602
}
import excel using "CSMAR_finance_data.xlsx", firstrow clear

* Standardize firm/year
capture noisily _try_rename_if_missing stkcd Scode Stkcd stockcode 证券代码 股票代码
capture noisily _try_rename_if_missing year Year 年度

* Try to map common control names if your sheet uses alternatives (add more if needed)
capture noisily _try_rename_if_missing Size size lnassets LnAssets TotalAssets_log
capture noisily _try_rename_if_missing Lev Leverage leverage debt_ratio
capture noisily _try_rename_if_missing ROA roa ROA_ttm
capture noisily _try_rename_if_missing RD rd RnD RDintensity
capture noisily _try_rename_if_missing Board board BoardSize board_size
capture noisily _try_rename_if_missing SOE soe state state_owned

* ST/*ST flags (optional)
capture noisily _try_rename_if_missing 当年是否ST或PT STflag_now ST_now
capture noisily _try_rename_if_missing 样本期是否ST或PT STflag_sample ST_sample

* Industry columns (optional; will be used later for FE or checks)
capture noisily _try_rename_if_missing 行业代码1 IndustryCode Industry1
capture noisily _try_rename_if_missing 行业名称1 IndustryName Industry

tempfile csmar
save `csmar'

* -------------------- 3) Central ESGP, Slack, MEA by year --------------------
capture confirm file "GovWorkReports_count_summary..xlsx"
if _rc {
    di as error "Missing: GovWorkReports_count_summary..xlsx"
    exit 603
}
import excel using "GovWorkReports_count_summary..xlsx", firstrow clear

capture noisily _try_rename_if_missing year Year 年度
capture noisily _try_rename_if_missing ESGP esgp ESG_index
capture noisily _try_rename_if_missing Slack slack SA SlackIndex
capture noisily _try_rename_if_missing MEA mea mediaIndex

keep year ESGP Slack MEA
duplicates drop year, force

tempfile gov
save `gov'

* -------------------- 4) Merge to firm-year panel --------------------
use `csmar', clear

* Standardize types
capture confirm numeric variable year
if _rc destring year, replace ignore(" -,/")
tostring stkcd, replace force

merge 1:1 stkcd year using `cnrds', keep(match master) nogen
merge m:1 year using `gov', keep(match master) nogen

order stkcd year GI ESGP Slack MEA Size Age Lev ROA RD SOE Board 行业代码1 行业名称1 当年是否ST或PT 样本期是否ST或PT, first

* -------------------- 5) Save outputs --------------------
export excel using "data.xlsx", firstrow(variables) sheet("Sheet1") sheetreplace replace
export delimited using "panel_planA.csv", replace

log close
di as result "Done. Created data.xlsx and panel_planA.csv"
